{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7d687e86-9c08-497b-9018-0f6ec0114937",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-15T02:48:30.656315Z",
     "iopub.status.busy": "2022-05-15T02:48:30.655700Z",
     "iopub.status.idle": "2022-05-15T02:48:31.130753Z",
     "shell.execute_reply": "2022-05-15T02:48:31.130103Z",
     "shell.execute_reply.started": "2022-05-15T02:48:30.656238Z"
    }
   },
   "outputs": [],
   "source": [
    "# 导入包\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7d7b2f0b-6dc4-4b17-a036-c9b5954cb6bc",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-15T02:51:44.003075Z",
     "iopub.status.busy": "2022-05-15T02:51:44.002703Z",
     "iopub.status.idle": "2022-05-15T02:51:44.024948Z",
     "shell.execute_reply": "2022-05-15T02:51:44.024383Z",
     "shell.execute_reply.started": "2022-05-15T02:51:44.003046Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>two</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1  key2\n",
       "0  one     1\n",
       "1  two     1\n",
       "2  one     1\n",
       "3  two     2\n",
       "4  one     2\n",
       "5  two     2\n",
       "6  one     3\n",
       "7  two     4\n",
       "8  two     4"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 创建DataFrame对象\n",
    "data = pd.DataFrame(\n",
    "    {\"key1\": [\"one\", \"two\"] * 4 + [\"two\"], \"key2\": [1, 1, 1, 2, 2, 2, 3, 4, 4]}\n",
    ")\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "1a33e31f-3949-4c35-88f6-df7ea295554d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-16T00:35:07.084166Z",
     "iopub.status.busy": "2022-05-16T00:35:07.083825Z",
     "iopub.status.idle": "2022-05-16T00:35:07.102660Z",
     "shell.execute_reply": "2022-05-16T00:35:07.101793Z",
     "shell.execute_reply.started": "2022-05-16T00:35:07.084140Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    False\n",
       "1    False\n",
       "2     True\n",
       "3    False\n",
       "4    False\n",
       "5     True\n",
       "6    False\n",
       "7    False\n",
       "8     True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 检测重复情况\n",
    "data.duplicated()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "f348e4ea-5984-443b-8f96-3265a1e7239e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-16T00:47:52.503392Z",
     "iopub.status.busy": "2022-05-16T00:47:52.503025Z",
     "iopub.status.idle": "2022-05-16T00:47:52.515085Z",
     "shell.execute_reply": "2022-05-16T00:47:52.514228Z",
     "shell.execute_reply.started": "2022-05-16T00:47:52.503366Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1  key2\n",
       "0  one     1\n",
       "1  two     1\n",
       "3  two     2\n",
       "4  one     2\n",
       "6  one     3\n",
       "7  two     4"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 删除重复数据\n",
    "data.drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "c75551f2-5dee-4639-a8a6-5455f168b3d3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-16T00:58:14.721533Z",
     "iopub.status.busy": "2022-05-16T00:58:14.721202Z",
     "iopub.status.idle": "2022-05-16T00:58:14.732988Z",
     "shell.execute_reply": "2022-05-16T00:58:14.731991Z",
     "shell.execute_reply.started": "2022-05-16T00:58:14.721506Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>key3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>two</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1  key2  key3\n",
       "0  one     1     0\n",
       "1  two     1     1\n",
       "2  one     1     2\n",
       "3  two     2     3\n",
       "4  one     2     4\n",
       "5  two     2     5\n",
       "6  one     3     6\n",
       "7  two     4     7\n",
       "8  two     4     8"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 改造数据，加一列\n",
    "data[\"key3\"] = np.arange(9)\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "13f896a2-51d8-4ee5-93b9-6f3531e8fa7f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-16T01:03:20.649004Z",
     "iopub.status.busy": "2022-05-16T01:03:20.648600Z",
     "iopub.status.idle": "2022-05-16T01:03:20.658500Z",
     "shell.execute_reply": "2022-05-16T01:03:20.657706Z",
     "shell.execute_reply.started": "2022-05-16T01:03:20.648975Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>key3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1  key2  key3\n",
       "0  one     1     0\n",
       "3  two     2     3\n",
       "6  one     3     6\n",
       "7  two     4     7"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 指定根据key2列进行去重操作\n",
    "data.drop_duplicates([\"key2\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "514d490c-723d-4fcf-a115-d37d49ef71ed",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-16T01:13:15.300224Z",
     "iopub.status.busy": "2022-05-16T01:13:15.299895Z",
     "iopub.status.idle": "2022-05-16T01:13:15.310847Z",
     "shell.execute_reply": "2022-05-16T01:13:15.309754Z",
     "shell.execute_reply.started": "2022-05-16T01:13:15.300198Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>key3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>two</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1  key2  key3\n",
       "1  two     1     1\n",
       "2  one     1     2\n",
       "4  one     2     4\n",
       "5  two     2     5\n",
       "6  one     3     6\n",
       "8  two     4     8"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 指定根据key1和key2列进行去重操作，并且保留租后一次出现的位置\n",
    "data.drop_duplicates([\"key1\", \"key2\"], keep=\"last\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
